from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from load_data import data

data = data()
'利用SSE选择k'
SSE = []  # 存放每次结果的误差平方和
for k in range(1,20):
    kmeans = KMeans(n_clusters=k)  # 构造聚类器
    kmeans.fit(data)
    SSE.append(kmeans.inertia_)

plt.xlabel('簇数量k')
plt.ylabel('簇内误方差SSE')
plt.plot(range(1,20),SSE,'o-') #o-图像连起来
